Building Energy Spatial Viz

library(tidyverse)
library(sf) 
library(ggpubr)
recs <- readRDS("../data/recs_2015_cleaned.RDS")
acs <- readRDS("../data/acs_2015_cleaned.RDS")
# set centers for mapping graduated symbols
centers <- st_centroid(acs, of_largest_polygon = TRUE)
Warning: st_centroid assumes attributes are constant over geometries
# Count housing type by geographic region
recs_housing_type <- recs %>%
  count(region_name, housing_type)

# Get the geometry from ACS data to join to RECS
region_geometry <- acs %>%
  group_by(region) %>%
  select(-GEOID,-NAME) %>%
  summarise_all(mean) %>%
  select(region, geometry)
Warning: There were 4 warnings in `summarise()`.
The first warning was:
ℹ In argument: `geometry = (function (x, ...) ...`.
ℹ In group 1: `region = "Midwest"`.
Caused by warning in `mean.default()`:
! argument is not numeric or logical: returning NA
ℹ Run `dplyr::last_dplyr_warnings()` to see the 3 remaining warnings.
# Join ACS geometry to RECS counts by region
recs_housing_spatial <- region_geometry %>%
  left_join(recs_housing_type, join_by(region == region_name)) %>%
  group_by(region) %>%
  mutate(totalHousing = sum(n),
         typeProportion = n / totalHousing)

What is the RECS distribution of types of housing by region?

# Map a RECS variable 
map_recs_var <- function(df, var, map_title, legend_title, direction=1) {
  ggplot() +
    geom_sf(data = df, aes(fill = {{var}})) +
    scale_fill_distiller(palette = "Purples",
                         direction = direction) +
    geom_sf(data = acs,
            color = "darkgray",
            fill = NA) +
    labs(title = map_title,
         fill = legend_title) +
    ggthemes::theme_map()
}

# Map proportion of types of homes
map_recs_unit_type <- function(unit_type, map_title, legend_title, direction=1) {
  df <- recs_housing_spatial %>%
    filter(housing_type == unit_type) 

    map_recs_var(df, typeProportion, map_title, "Proportion of all \nhousing in region", direction)
}
# create a map for each type of housing unit by region
map_recs_unit_type("Single family attached", "Proportion of Single Unit Attached Homes")

map_recs_unit_type("Single family detached", "Proportion of Single Unit Detached Homes")

map_recs_unit_type("Apt small building", "Proportion of 2-4 Unit Homes by Region")

map_recs_unit_type("Apt large building", "Proportion of 5+ Unit Homes by Region")

Single-unit detached homes dominate by region, although less so in New England, likely due to the presence of more compact cities like New York City. Single-unit attached and small apartments make up relatively larger proportions in New England, although are less common overall. Large apartments are common in New England and, interestingly, also the West- perhaps due to compact cities in California?

What is the ACS estimate for distribution of housing type by state?

# map an ACS variable at the state level
map_acs_var <- function(var, map_title, palette, direction=1, caption=NA) {
  ggplot() +
    geom_sf(data = acs, aes(fill = {{var}})) +
    scale_fill_distiller(palette = palette,
                         direction = direction) +
    labs(title = map_title,
         fill = "Proportion",
         caption=caption) +
    ggthemes::theme_map()
}
# create a map for proportions of each type of housing unit by state
single_unit_att <- map_acs_var(unit_1_attached/total_units, "Proportion of Single Unit Attached Homes", "Oranges")

single_unit_det <- map_acs_var(unit_1_detached/total_units, "Proportion of Single Unit Detached Homes", "Oranges")

small_apt <- map_acs_var(units_2_4/total_units, "Proportion of Small Apartments (2-4 Unit)", "Oranges")

large_apt <- map_acs_var(units_5_plus/total_units, "Proportion of Large Apartments (5+ Unit)", "Oranges", caption="Data source: 2011-2015 5-year ACS, US Census Bureau")
units_by_state <- ggarrange(single_unit_det,
                            single_unit_att,
                            small_apt,
                            large_apt)

annotate_figure(
  units_by_state,
  top = text_grob(
    "Frequency of Housing Types by State, 2011-2015",
    face = "bold",
    size = 18
  )
)

Figure 1: Single unit houses, unsuprisingly, also dominate by state. They seem least prevalent in New York and Massachusets, and make up a somewhat lower proportion in Illinois, California, Nevada, and North Dakota. Pennsylvania and Maryland contain an intriguingly high proportion of single-unit attached houses. Over a fifth of Rhode Island’s housing is small apartments; New York, Connecticut, and New Jersey also have high proportions. Although not visible, around half of D.C.’s housing is large apartments, followed by New York, supporting the idea that cities tend to contain larger unit housing.

#Who rents vs owns by state?

map_acs_var(tenureOwned/tenureTotal, "Who Owns Housing?", "Greens", caption="Data source: 2011-2015 5-year ACS, US Census Bureau")

map_acs_var(tenureRented/tenureTotal, "Who Rents Housing?", "Greens", caption="Data source: 2011-2015 5-year ACS, US Census Bureau")

Generally, owning is more common then renting. The exceptions to this rule are New York, Calfornia, and Nevada- states with big cities.

What’s the median age of buildings by state?

map_acs_var(med_year_built, "Median Building Age, 2011-2015", "BuPu", -1, caption="Data source: 2011-2015 5-year ACS, US Census Bureau")

Nevada has the newest buildings. Houses in the South and West generally tend to be newer. Houses in New York are oldest, along with several states in New England and the Midwest- notably Illinois, Iowa, and Ohio.

# Add geometry to a summary of a RECS variable and map it 
geom_map_recs <- function(df, var, map_title, legend_title, palette) {
  data_geom <- region_geometry %>%
    left_join(df, join_by(region == region_name))

  map_recs_var(data_geom, {{var}}, map_title, legend_title, palette)
}

# group by and summarize to get the regional means of a RECS variable
calc_recs_summary <- function(grouping, mean_var){
  recs %>%
  group_by({{grouping}}) %>%
  summarise(mean = mean({{mean_var}}))
}

What regions’ houses have air conditioning?

recs_aircon <- recs %>%
  count(region_name, aircon) %>%
  group_by(region_name) %>%
  mutate(total = sum(n),
         proportion_aircon = n / total) %>%
  filter(aircon == "Yes")

aircon_choropleth <- geom_map_recs(
  recs_aircon,
  proportion_aircon,
  "Proportion of Homes with Air Conditioning",
  "Propotion",
  "Blues"
)

aircon_choropleth

Air conditioning is nearly universal in the South, and is almost as common in the Midwest. Its prevalence drops to around 80% in New England, and down to 75% in the West.

Which regions use more energy?

recs_tot_energy <- calc_recs_summary(region_name, TOTALBTU)

tot_energy_choropleth <- geom_map_recs(
  recs_tot_energy,
  mean,
  "Mean Energy Use by Region in 2015",
  "Mean Total Energy",
  "Reds"
) 

tot_energy_choropleth

New England and the Midwest overall use the most energy, followed by the South, with the West using least.

Which regions use more electricity?

recs_btu <- calc_recs_summary(region_name, BTUEL)
 
 btu_choropleth <- geom_map_recs(
  recs_btu,
   mean,
  "Mean Electrity Usage by Region in 2015",
  "Mean Electricity \nUse (thousand \nBtu)",
  "Purples"
)
 
 btu_choropleth

The South uses noticeably more air conditioning than other regions.

What about natural gas?

recs_btn <- calc_recs_summary(region_name, BTUNG)

 
btn_choropleth <- geom_map_recs(
  recs_btn,
  mean,
  "Mean Natural Gas Usage by Region in 2015",
  "Mean Natural \nGas Use\n(thousand Btu)",
  "Oranges"
) 

btn_choropleth

The Midwest uses notably more natural gas than other regions, followed by New England and the West.

Multivariate Analysis

btn_choropleth +
  geom_sf(data = centers, aes(size = med_year_built), color = "red") +
    scale_size(range = c(.5, 6)) +
  labs(size = "Median Year Built")

recs_sqft <- calc_recs_summary(region_name, TOTSQFT_EN)

sqft_choropleth <- geom_map_recs(
  recs_sqft,
  mean,
  "Median Building Age and Square Footage of Residential Buildings by Region in 2015",
  "Mean Square Feet",
  "Purples"
) 

sqft_choropleth

sqft_choropleth +
  geom_sf(data = centers, aes(size = med_year_built), color = "blue") +
    scale_size(range = c(.5, 6)) +
  labs(size = "Median Year Built",
       caption = "Data sources: 2011-2015 ACS, US Census Bureau; 2015 RECS, US Energy Information Admininstration")

Figure 2: Midwestern homes tend to be larger, followed by New England homes, which also tend to be older. The South and West have relatively newer and smaller homes.